---
title: "Untitled"
author: "Sarah Jewett"
date: "6/24/2021"
output: html_document
---

Packages
```{r}

```


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(twitteR) #for the rest API
#https://cran.r-project.org/web/packages/twitteR/twitteR.pdf

library(tidyverse)

access_token <- ""
access_secret <-""
consumer_key <- ""
consumer_secret <- ""

setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
#When it prompts you with the question below, answer 2: No
#    Use a local file to cache OAuth access credentials between R sessions?
#    1: Yes
#    2: No


# test it works by getting the last 10 tweets with the #rstats tag
rstatstweets <- searchTwitter("#rstats", n=10) %>%
  twListToDF()

View(rstatstweets)


```


Trying to get more than 3200 with twitteR:
```{r}
guardian <-userTimeline('guardian', n=20000)

# Warning in statusBase(cmd, params, n, 3200, ...) :
  # statuses/user_timeline has a cap of 3200 statuses, clipping
```
No luck...

rtweet() allows you to bypass having a personal API key, but can only search up to 3200 tweets at one time as well. I find it actually gives you the 3200, whereas twitteR even with 3200 specified, might provide under that number.

get_timeline() searches handles with up to 90 vars
```{r}
# 3200 tweet limit, individually done to maximize number 

library(rtweet)
guardian <- get_timeline("guardian", n=20000) 
gdnpolitics <- get_timeline("GDnPolitics", n=3200) 

mailonline <- get_timeline("MailOnline", n=3200) 

thesun <- get_timeline("thesun", n=3200) 

thetimes <- get_timeline("thetimes", n=3200) 

telegraph <- get_timeline("telegraph", n=3200) 
telegraphnews <- get_timeline("telegraphnews", n=3200)
telepolitics <- get_timeline("telepolitics", n=3200) 

ft <- get_timeline("ft", n=3200) 
ftukpolitics <- get_timeline("ftukpolitics", n=3200) 

dailymirror<- get_timeline("dailymirror", n=3200)

independent <- get_timeline("independent", n=3200)
indypremium <- get_timeline("indypremium", n=3200)

# make list() of the data to then rbind
handles<- list(guardian, gdnpolitics, mailonline, thesun, thetimes, telegraph, telegraphnews, telepolitics, ftukpolitics, dailymirror, independent, indypremium)

library(data.table)
# use rbindlist() from data.table package 
recent <- rbindlist(handles)

save(recent, file= "recent.RDA")
load("recent.RDA")
```
Next I want to plot the frequency of tweets over the course of the 3200 in a variety of ways:


```{r}
library(rtweet)
library(dplyr)

recent %>%
filter(recent$created_at >= "2021-01-01 00:00:00 UTC") %>%
ts_plot(dplyr::group_by(recent, screen_name), "months") +
  labs(x = NULL, y = NULL, color = "Twitter Handle",
       title = "Frequency of tweets by newspapers of interest",
       subtitle = paste0(format(min(recent$created_at), "%d %B %Y"), " to ",      format(max(recent$created_at),"%d %B %Y")),
       caption = "Data collected from Twitter's REST API via rtweet") +
  theme_minimal() 


recent %>%
  dplyr::group_by(screen_name) %>%
  dplyr::filter(created_at >= "2021-01-01 00:00:00 UTC") %>%
  ts_plot("months") +
labs(x = NULL, y = NULL, color = "Twitter Handle",
       title = "Frequency of tweets by newspapers of interest",
       subtitle = paste0("01 January 2021", " to ",      format(max(recent$created_at),"%d %B %Y")),
       caption = "Data collected from Twitter's REST API via rtweet") +
  theme_minimal() 

library(ggplot2) 
theme_set(theme_minimal())

recent %>%
  dplyr::group_by(screen_name) %>%
  dplyr::filter(created_at >= "2021-06-01 00:00:00 UTC") %>%
  ts_plot("weeks", show.legend = FALSE) +
labs(x = NULL, y = NULL, color = "Twitter Handle",
       title = "Frequency of tweets by newspapers of interest",
       subtitle = paste0("End of May 2021", " to ",      format(max(recent$created_at),"%d %B %Y")),
       caption = "Data collected from Twitter's REST API via rtweet") +
  geom_text(aes(label = screen_name), check_overlap = TRUE, hjust = 1, vjust = -.5) +
  theme(legend.position = "none")
  
library(ggrepel)
recent %>%
  dplyr::group_by(screen_name) %>%
  dplyr::filter(created_at <= "2021-01-01 00:00:00 UTC") %>%
  ts_plot("weeks", show.legend = FALSE) +
labs(x = NULL, y = NULL, color = "Twitter Handle",
       title = "Frequency of tweets by newspapers of interest",
       subtitle = paste0(format(min(recent$created_at),"%d %B %Y"), " to ", "End of 2020"),
       caption = "Data collected from Twitter's REST API via rtweet") +
    # geom_text(aes(label = screen_name), check_overlap = TRUE) +
  ggrepel::geom_text_repel(aes(label = screen_name), max.overlaps= 12) +
  theme(legend.position = "none")


ggsave(".jpg")
```